library(tidyverse)
library(dplyr)
library(stringr)
library(readr)
library(stargazer)
library(broom)
library(estimatr)

setwd('')
out <- ('./output')

source('./code/rename_fx.R')

public <- read_csv('./data/Traceability-US Public Survey 2021_December 1, 2021_11.08.csv') %>%
  filter(!row_number() %in% c(1:3))

stacked_data <- public %>%
  mutate(id = ResponseId,
         issue = localvignette1,
         prob_trace = case_when(str_detect(`v1-traceability`, 'difficult to') ~ 'difficult',
                                str_detect(`v1-traceability`, 'easy to') ~ 'easy'),
         simplicity = case_when(str_detect(`v1-solcomplexity`, 'can directly') ~ 'simple',
                                str_detect(`v1-solcomplexity`, 'cannot directly') ~ 'complex'),
         solve_prob = as.numeric(str_remove(`v1-probsolve`, '%')),
         solu_trace = case_when(str_detect(`v1-soltraceability`, 'transparent') ~ 'easy',
                                str_detect(`v1-soltraceability`, 'indirect') ~ 'difficult'),
         cobenefits = case_when(str_detect(`v1-poleffect`, 'increase') ~ 'costs',
                                str_detect(`v1-poleffect`, 'decrease') ~ 'benefits'),
         time = case_when(str_detect(`v1-timetosolve`, 'six months') ~ .5,
                          str_detect(`v1-timetosolve`, 'two years') ~ 2,
                          str_detect(`v1-timetosolve`, 'four years') ~ 4,
                          str_detect(`v1-timetosolve`, 'ten years') ~ 10,
                          str_detect(`v1-timetosolve`, '15 years or more') ~ 15),
         dv_importance = case_when(Q85=='Not at all important' ~ 0,
                                   Q85=='Slightly important' ~ 1,
                                   Q85=='Moderately important' ~ 2,
                                   Q85=='Very important' ~ 3,
                                   Q85=='Extremely important' ~ 4),
         dv_likely = case_when(Q86=='Extremely unlikely' ~ 0,
                               Q86=='Somewhat unlikely' ~ 1,
                               Q86=='Neither likely nor unlikely' ~ 2,
                               Q86=='Somewhat likely' ~ 3,
                               Q86=='Extremely likely' ~ 4),
         dv_priority = case_when(Q87=='Not prioritize at all' ~ 0,
                                 Q87=='Low priority' ~ 1,
                                 Q87=='Moderate priority' ~ 2,
                                 Q87=='High priority' ~ 3,
                                 Q87=='The highest priority' ~ 4),
         dv_approval = case_when(Q123=='Disapprove a great deal' ~ 0,
                                 Q123=='Disapprove a moderate amount' ~ 1,
                                 Q123=='Neither approve nor disapprove' ~ 2,
                                 Q123=='Approve a moderate amount' ~ 3,
                                 Q123=='Approve a great deal' ~ 4)) %>%
  bind_rows(public %>%
              mutate(id = ResponseId,
                     issue = `v2-topicshort`,
                     prob_trace = case_when(str_detect(`v2-probtraceability`, 'difficult to') ~ 'difficult',
                                            str_detect(`v2-probtraceability`, 'easy to') ~ 'easy'),
                     simplicity = case_when(str_detect(`v2-solcomplexity`, 'fixing') ~ 'simple',
                                            str_detect(`v2-solcomplexity`, 'complex') ~ 'complex'),
                     solve_prob = as.numeric(str_remove(`v2-probsolve`, '%')),
                     solu_trace = case_when(str_detect(`v2-soltraceability`, 'difficult to') ~ 'difficult',
                                            str_detect(`v2-soltraceability`, 'easy to') ~ 'easy'),
                     cobenefits = case_when(str_detect(`v2-poleffect`, 'increase') ~ 'costs',
                                            str_detect(`v2-poleffect`, 'decrease') ~ 'benefits'),
                     time = case_when(str_detect(`v2-timetosolve`, 'six months') ~ .5,
                                      str_detect(`v2-timetosolve`, 'two years') ~ 2,
                                      str_detect(`v2-timetosolve`, 'four years') ~ 4,
                                      str_detect(`v2-timetosolve`, 'ten years') ~ 10,
                                      str_detect(`v2-timetosolve`, 'fifteen years or more') ~ 15),
                     dv_importance = case_when(Q124=='Not at all important' ~ 0,
                                               Q124=='Slightly important' ~ 1,
                                               Q124=='Moderately important' ~ 2,
                                               Q124=='Very important' ~ 3,
                                               Q124=='Extremely important' ~ 4),
                     dv_likely = case_when(Q125=='Extremely unlikely' ~ 0,
                                           Q125=='Somewhat unlikely' ~ 1,
                                           Q125=='Neither likely nor unlikely' ~ 2,
                                           Q125=='Somewhat likely' ~ 3,
                                           Q125=='Extremely likely' ~ 4),
                     dv_priority = case_when(Q126=='Not prioritize at all' ~ 0,
                                             Q126=='Low priority' ~ 1,
                                             Q126=='Moderate priority' ~ 2,
                                             Q126=='High priority' ~ 3,
                                             Q126=='The highest priority' ~ 4),
                     dv_approval = case_when(Q127=='Disapprove a great deal' ~ 0,
                                             Q127=='Disapprove a moderate amount' ~ 1,
                                             Q127=='Neither approve nor disapprove' ~ 2,
                                             Q127=='Approve a moderate amount' ~ 3,
                                             Q127=='Approve a great deal' ~ 4))) %>%
  mutate(pid_simple = case_when(str_detect(party, 'Democrat') ~ "D",
                                str_detect(party, 'Republican') ~ "R",
                                str_detect(party, 'Independent') ~ "I"),
         age = as.numeric(age),
         trust_recode = case_when(trust %in% c('Never', 'Some of the time') ~ 'low trust',
                                  trust %in% c('About half of the time', 'Most of the time', 'Always') ~ 'high trust')) %>%
  mutate_at(vars(dv_importance:dv_approval), function(x){x/4}) %>%
  mutate(time = time/15,
         solve_prob = solve_prob/100,
         dv_index = (dv_importance + dv_likely + dv_priority + dv_approval)/4)

# reliability
stacked_data %>%
  ungroup() %>% 
  select(dv_importance, dv_likely, dv_priority, dv_approval) %>%
  psych::alpha()

# work on ways to rescale time and solve_prob -- try categorical variable too
# elite result on traceability (and not public) removes problems of demand effects

reg_data <- stacked_data %>%
  select(issue:dv_index, ResponseId) %>%
  pivot_longer(cols=starts_with('dv_')) %>%
  group_by(name) %>%
  do(tidy(lm_robust(value ~ prob_trace + simplicity + solve_prob + solu_trace + cobenefits + time,
                    fixed_effects = ~ issue + ResponseId, 
                    data = .))) %>%
  mutate(term = vars_rename(term),
         name = vars_rename(name))

reg_data %>%
  mutate(panel = case_when(name=="Index" ~ "Index",
                           T ~ "Individual Variables"),
         name = ordered(name, levels = c("Index", "Importance", 'State Leg.\nApproval', "Likelihood", 'Priority'))) %>%
  ggplot(aes(col = name, shape = name)) + 
  geom_hline(yintercept=0, lty=2) + 
  geom_pointrange(aes(x=term, y=estimate, ymin = (estimate - 1.96*std.error), ymax=(estimate + 1.96*std.error)), position=position_dodge(width=.3)) + 
  geom_pointrange(aes(x=term, y=estimate, ymin = (estimate - 1.65*std.error), ymax=(estimate + 1.65*std.error)), fatten=1.5, size=1.1, position=position_dodge(width=.3)) + 
  coord_flip() +  
  facet_wrap(~panel) + 
  labs(x='', y = '') +
  scale_color_viridis_d(breaks = c("Importance", 'State Leg.\nApproval',"Likelihood", 'Priority')) + 
  scale_shape_discrete(breaks = c("Importance", 'State Leg.\nApproval',"Likelihood", 'Priority')) + 
  theme_classic() +
  theme(legend.position='bottom', legend.title=element_blank()) 
ggsave(paste0(out,'/public_main_index.pdf'), height=6, width=6)

stargazer(reg_data %>% 
            select(DV = name, Coefficient = term, estimate, std.error) %>%
            mutate(estimate = round(estimate, 3),
                   std.error = round(std.error, 3)), 
          summary=F, out = paste0(out, '/public_main.tex'), style='apsr', digits=2, rownames=F, 
          title = '\\textbf{Tabular Presentation of Public Experiment}',
          label = "tab:public_main")


stacked_data %>%
  select(issue:dv_index, pid_simple, age, raceeth, educ_3, ResponseId) %>%
  pivot_longer(cols=starts_with('dv_')) %>%
  group_by(name) %>%
  do(tidy(lm_robust(value ~ prob_trace + simplicity + solve_prob + solu_trace + cobenefits + time + pid_simple + age + educ_3 + raceeth,
                    fixed_effects = ~ issue, 
                    data = .))) %>%
  filter(term %in% c('prob_traceeasy', 'simplicitysimple', 'solve_prob', 'solu_traceeasy', 'cobenefitscosts', 'time')) %>%
  mutate(covar = 'covariates') %>%
  bind_rows(stacked_data %>%
              select(issue:dv_index) %>%
              pivot_longer(cols=starts_with('dv_')) %>%
              group_by(name) %>%
              do(tidy(lm_robust(value ~ prob_trace + simplicity + solve_prob + solu_trace + cobenefits + time,
                                fixed_effects = ~ issue, 
                                data = .))) %>%
              mutate(covar = 'no covariates')) %>%
  mutate(term = vars_rename(term),
         name = vars_rename(name)) %>%
  ggplot() + 
  geom_hline(yintercept=0, lty=2) + 
  geom_pointrange(aes(x=term, y=estimate, ymin = (estimate - 1.96*std.error), ymax=(estimate + 1.96*std.error), col=covar), position = position_dodge(width=.5)) + 
  geom_pointrange(aes(x=term, y=estimate, ymin = (estimate - 1.65*std.error), ymax=(estimate + 1.65*std.error), col=covar), fatten=2, size=1.1, position = position_dodge(width=.5)) + 
  coord_flip() +  
  facet_wrap(~name) + 
  labs(x='', y = '') +
  theme_classic() +
  scale_color_grey() + 
  theme(legend.position='bottom', legend.title=element_blank()) 
ggsave(paste0(out, '/public_covar.pdf'), height=5, width=5)

reg_data <- reg_data <- stacked_data %>%
  select(issue:dv_index, pid_simple, age, raceeth, educ_3, ResponseId) %>%
  pivot_longer(cols=starts_with('dv_')) %>%
  group_by(name) %>%
  do(tidy(lm_robust(value ~ prob_trace + simplicity + solve_prob + solu_trace + cobenefits + time + pid_simple + age + educ_3 + raceeth,
                    fixed_effects = ~ issue, 
                    data = .))) %>%
  #filter(term %in% c('prob_traceeasy', 'simplicitysimple', 'solve_prob', 'solu_traceeasy', 'cobenefitscosts', 'time')) %>% 
  mutate(name = vars_rename(name))
r1 <- lm_robust(dv_importance ~ prob_trace + simplicity + solve_prob + solu_trace + cobenefits + time + pid_simple + age + educ_3 + raceeth,
                fixed_effects = ~ issue, 
                data = stacked_data)
r2 <- lm_robust(dv_approval ~ prob_trace + simplicity + solve_prob + solu_trace + cobenefits + time + pid_simple + age + educ_3 + raceeth,
                fixed_effects = ~ issue, 
                data = stacked_data)
r3 <- lm_robust(dv_likely ~ prob_trace + simplicity + solve_prob + solu_trace + cobenefits + time + pid_simple + age + educ_3 + raceeth,
                fixed_effects = ~ issue, 
                data = stacked_data)
r4 <- lm_robust(dv_priority ~ prob_trace + simplicity + solve_prob + solu_trace + cobenefits + time + pid_simple + age + educ_3 + raceeth,
                fixed_effects = ~ issue, 
                data = stacked_data)
r5 <- lm_robust(dv_index ~ prob_trace + simplicity + solve_prob + solu_trace + cobenefits + time + pid_simple + age + educ_3 + raceeth,
                fixed_effects = ~ issue, 
                data = stacked_data)

texreg(list(r5, r1, r2, r3, r4),
       stars = NULL,
       digits = 3,
       include.ci=F,
       omit.coef = 'Intercept',
       custom.model.names = c("Index", "Importance", "Approval", "Likelihood", "Priority"),
       #custom.coef.names=c('Staff Recommendation', 'Commenter Preference', 'Positive', 'Negative', 'Neutral', 'Time Trend'),
       caption = '\\textbf{Tabular Presentation of Public Experiment -- Covariate Adjusted Models}',
       #groups = list('Comments' = 3:5),
       label = "tab:public_covar",
       #custom.gof.rows = list('Year and Month FE' = c('Y', "Y", "N", "N")),
       include.adjrs = FALSE,
       include.rmse = FALSE,
       file = paste0(out, '/public_covar.tex'))

reg_data <- stacked_data %>%
  select(issue:dv_index, pid_simple) %>%
  pivot_longer(cols=starts_with('dv_')) %>%
  group_by(name, pid_simple) %>%
  filter(pid_simple %in% c("D", "R")) %>%
  do(tidy(lm_robust(value ~ prob_trace + simplicity + solve_prob + solu_trace + cobenefits + time,
                    fixed_effects = ~ issue, 
                    data = .))) %>%
  mutate(term = vars_rename(term),
         name = vars_rename(name))

ggplot(reg_data %>% filter(name=='Index')) + 
  geom_hline(yintercept=0, lty=2) + 
  geom_pointrange(aes(x=term, y=estimate, ymin = (estimate - 1.96*std.error), ymax=(estimate + 1.96*std.error), col = pid_simple), position = position_dodge(width=.5)) + 
  geom_pointrange(aes(x=term, y=estimate, ymin = (estimate - 1.65*std.error), ymax=(estimate + 1.65*std.error), col = pid_simple), fatten=2, size=1.1, position = position_dodge(width=.5)) + 
  coord_flip() +  
  #facet_wrap(~name) + 
  labs(x='', y = paste0('Effect of Policy Attributes')) +
  theme_classic() +
  scale_color_manual(values = c("D" = 'Blue', "R" = 'Red')) + 
  theme(legend.position='bottom', legend.title=element_blank()) 
ggsave(paste0(out,'/public_pid.pdf'), height=5, width=5)

reg_data <- stacked_data %>%
  select(issue:dv_index, trust_recode) %>%
  pivot_longer(cols=starts_with('dv_')) %>%
  group_by(name, trust_recode) %>%
  do(tidy(lm_robust(value ~ prob_trace + simplicity + solve_prob + solu_trace + cobenefits + time,
                    fixed_effects = ~ issue, 
                    data = .))) %>%
  filter(!is.na(trust_recode)) %>%
  mutate(term = vars_rename(term),
         name = vars_rename(name))
ggplot(reg_data %>% filter(name=="Index")) + 
  geom_hline(yintercept=0, lty=2) + 
  geom_pointrange(aes(x=term, y=estimate, ymin = (estimate - 1.96*std.error), ymax=(estimate + 1.96*std.error), col = trust_recode), position = position_dodge(width=.5)) + 
  geom_pointrange(aes(x=term, y=estimate, ymin = (estimate - 1.65*std.error), ymax=(estimate + 1.65*std.error), col = trust_recode), fatten=2, size=1.1, position = position_dodge(width=.5)) + 
  coord_flip() +  
  #facet_wrap(~name) + 
  labs(x='', y = '') +
  theme_classic() +
  scale_color_grey() + 
  theme(legend.position='bottom', legend.title=element_blank()) 
ggsave(paste0(out,'/public_trust.pdf'), height=5, width=5)

stargazer(reg_data %>% 
            select(DV = name, Trust = trust_recode, Coefficient = term, estimate, std.error) %>%
            mutate(estimate = round(estimate, 3),
                   std.error = round(std.error, 3)), 
          summary=F, out = paste0(out, '/public_trust.tex'), style='apsr', digits=2, rownames=F, 
          title = '\\textbf{Tabular Presentation of Public Experiment by Trust in Government}',
          label = "tab:public_trust")

reg_data <- stacked_data %>%
  select(issue:dv_index, trust_recode) %>%
  pivot_longer(cols=starts_with('dv_')) %>%
  filter(!is.na(issue)) %>%
  group_by(name, issue) %>%
  do(tidy(lm_robust(value ~ prob_trace + simplicity + solve_prob + solu_trace + cobenefits + time,
                    data = .))) %>%
  filter(term!='(Intercept)') %>%
  mutate(term = vars_rename(term),
         name = vars_rename(name))
ggplot(reg_data %>% filter(name=='Index')) + 
  geom_hline(yintercept=0, lty=2) + 
  geom_pointrange(aes(x=term, y=estimate, ymin = (estimate - 1.96*std.error), ymax=(estimate + 1.96*std.error)), position = position_dodge(width=.5)) + 
  geom_pointrange(aes(x=term, y=estimate, ymin = (estimate - 1.65*std.error), ymax=(estimate + 1.65*std.error)), fatten=2, size=1.1, position = position_dodge(width=.5)) + 
  coord_flip() +  
  facet_wrap(~issue) + 
  labs(x='', y = paste0('Effect of Policy Attributes')) +
  theme_classic() +
  scale_y_continuous(breaks=c(-.2,0,.2)) + 
  theme(legend.position='bottom', legend.title=element_blank())
ggsave(paste0(out, '/public_issue.pdf'), width=5, height=5)

stargazer(reg_data %>% 
            select(issue, DV = name, Coefficient = term, estimate, std.error) %>%
            mutate(estimate = round(estimate, 3),
                   std.error = round(std.error, 3)), 
          summary=F, out = paste0(out, '/public_issue.tex'), style='apsr', digits=2, rownames=F, 
          title = '\\textbf{Tabular Presentation of Public Experiment by Issue}',
          label = "tab:public_issue")

# order effects -- cant evaluate by randomized

# demographics
public %>%
  filter(Consent=="Yes") %>%
  group_by(raceeth) %>%
  summarise(n=n()) %>%
  mutate(pct = n/sum(n)) %>%
  filter(!is.na(raceeth)) %>%
  filter(!raceeth %in% c("Other", "Prefer not to answer")) %>% 
  rename(name = raceeth) %>%
  mutate(name = paste0('Race: ', name)) %>%
  bind_rows(public %>%
              mutate(pid_simple = case_when(str_detect(party, 'Democrat') ~ "Democrat",
                                            str_detect(party, 'Republican') ~ "Republican",
                                            str_detect(party, 'Independent') ~ "Independent")) %>%
              group_by(pid_simple) %>%
              summarise(n=n()) %>%
              mutate(pct = n/sum(n)) %>%
              filter(!is.na(pid_simple))%>%
              rename(name = pid_simple) %>%
              mutate(name = paste0("Party: ", name))) %>%
  bind_rows(public %>%
              group_by(educ_3) %>%
              summarise(n=n()) %>%
              mutate(pct = n/sum(n)) %>%
              filter(!is.na(educ_3))%>%
              filter(!educ_3 %in% c('Prefer not to answer', "Don't know")) %>%
              rename(name = educ_3) %>%
              mutate(name = paste0("Education: ", name))) %>%
  select(-n) %>%
  bind_rows(public %>%
              summarise(pct = sum(gender...71=='Female', na.rm=T)/n()) %>%
              mutate(name='Gender: Female')) %>%
  bind_rows(public %>%
              summarise(pct = median(as.numeric(age))) %>%
              mutate(name='Median Age')) %>%
  mutate(pct = round(pct,2)) -> demo_table

colnames(demo_table) <- c('Variable', 'Sample Prop.')

stargazer(demo_table, summary=F, out = paste0(out, '/demo_public.tex'), style='apsr', digits=2, rownames=F, 
          title = '\\textbf{Demographic Characteristics of Public Sample}',
          label = "tab:public_demo")


            